{
 "cells": [
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "[this doc on github](https://github.com/dotnet/interactive/tree/main/samples/notebooks/fsharp/Samples)\n",
    "\n",
    "### Referencing the package\n",
    "\n",
    "\n",
    "First, get the package and open the namespaces:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "dotnet_interactive": {
     "language": "fsharp"
    }
   },
   "outputs": [],
   "source": [
    "#r \"nuget:Microsoft.Data.Analysis, 0.21.0\"\n",
    "#r \"nuget: Plotly.NET.Interactive, 4.2.0\"\n",
    "#r \"nuget: Plotly.Net, 4.2.0\"\n",
    "\n",
    "open Microsoft.Data.Analysis"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Creating a data frame\n",
    "\n",
    "Create 3 columns to hold values of types `DateTime`, `int`, and `string`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "dotnet_interactive": {
     "language": "fsharp"
    }
   },
   "outputs": [],
   "source": [
    "let dateTimes = DateTimeDataFrameColumn(\"DateTimes\") // Default length is 0.\n",
    "let ints = Int32DataFrameColumn(\"Ints\", 6L) // Makes a column of Length 3. Filles with nulls initially.\n",
    "let strings = StringDataFrameColumn(\"Strings\", 6L)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Add some datetimes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "dotnet_interactive": {
     "language": "fsharp"
    }
   },
   "outputs": [],
   "source": [
    "dateTimes.Append(DateTime.Parse(\"2019/01/01\"));\n",
    "dateTimes.Append(DateTime.Parse(\"2019/01/01\"));\n",
    "dateTimes.Append(DateTime.Parse(\"2019/01/02\"));\n",
    "dateTimes.Append(DateTime.Parse(\"2019/02/02\"));\n",
    "dateTimes.Append(DateTime.Parse(\"2019/02/02\"));\n",
    "dateTimes.Append(DateTime.Parse(\"2019/03/02\"));"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Create a `DataFrame` with 3 columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "dotnet_interactive": {
     "language": "fsharp"
    }
   },
   "outputs": [],
   "source": [
    "let df = DataFrame([dateTimes; ints; strings]: DataFrameColumn list)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Adding better default formatting for data frames\n",
    "\n",
    "Create a formatter for data frames and data frame rows."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "dotnet_interactive": {
     "language": "fsharp"
    }
   },
   "outputs": [],
   "source": [
    "module DateFrameFormatter = \n",
    "    \n",
    "    // Locally open the F# HTML DSL.\n",
    "    open Html\n",
    "\n",
    "    let maxRows = 20\n",
    "\n",
    "    Formatter.Register<DataFrame>((fun (df: DataFrame) (writer: TextWriter) ->\n",
    "\n",
    "        let take = 20\n",
    "        table [] [\n",
    "          thead [] [\n",
    "            th [] [ str \"Index\" ]\n",
    "            for c in df.Columns do\n",
    "              th [] [ str c.Name]\n",
    "          ]\n",
    "          tbody [] [\n",
    "            for i in 0 .. min maxRows (int df.Rows.Count - 1) do\n",
    "              tr [] [\n",
    "                td [] [ i ]\n",
    "                for o in df.Rows.[int64 i] do\n",
    "                  td [] [ o ]\n",
    "              ]\n",
    "          ]\n",
    "        ]\n",
    "        |> writer.Write\n",
    "\n",
    "    ), mimeType = \"text/html\")\n",
    "    \n",
    "    Formatter.Register<DataFrameRow>((fun (row: DataFrameRow) (writer: TextWriter) ->\n",
    "\n",
    "        table [] [\n",
    "          tbody [] [\n",
    "            tr [] [\n",
    "              for o in row do\n",
    "                td [] [ o ] \n",
    "            ]\n",
    "          ]\n",
    "        ]\n",
    "        |> writer.Write\n",
    "\n",
    "    ), mimeType = \"text/html\")\n",
    "    "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now view the data frame: "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "dotnet_interactive": {
     "language": "fsharp"
    }
   },
   "outputs": [],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Modifying data frames\n",
    "\n",
    "Change a value directly through df:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "dotnet_interactive": {
     "language": "fsharp"
    }
   },
   "outputs": [],
   "source": [
    "df.[0L, 1] <- 10\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We can also modify the values in the columns through indexers defined in `PrimitiveDataColumn` and `StringColumn`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "dotnet_interactive": {
     "language": "fsharp"
    }
   },
   "outputs": [],
   "source": [
    "ints.[1L] <- Nullable 100\n",
    "strings.[1L] <- \"Foo!\"\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Check the data type"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "dotnet_interactive": {
     "language": "fsharp"
    }
   },
   "outputs": [],
   "source": [
    "df.Info()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The `DataFrame` and the base `DataFrameColumn` class that all columns derive from expose a number of useful APIs: binary operations, computations, joins, merges, handling missing values and more."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "dotnet_interactive": {
     "language": "fsharp"
    }
   },
   "outputs": [],
   "source": [
    "df.[\"Ints\"].Add(5, inPlace=true)\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "dotnet_interactive": {
     "language": "fsharp"
    }
   },
   "outputs": [],
   "source": [
    "df.[\"Ints\"] <- (ints / 5) * 20\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Let's `null` it up!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "dotnet_interactive": {
     "language": "fsharp"
    }
   },
   "outputs": [],
   "source": [
    "df.[\"Ints\"].FillNulls(100, inPlace=true)\n",
    "df.[\"Strings\"].FillNulls(\"Bar\", inPlace=true)\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "DataFrame exposes `Columns` property that we can enumerate over to access our columns. Here's how you can access the first row, though."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "dotnet_interactive": {
     "language": "fsharp"
    }
   },
   "outputs": [],
   "source": [
    "let row0 = df.Rows.[0L]\n",
    "row0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "dotnet_interactive": {
     "language": "fsharp"
    }
   },
   "outputs": [],
   "source": [
    "row0"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Filtering and sorting data frames\n",
    "\n",
    "Let's take a look at `Filter`, `Sort`, and `GroupBy`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "dotnet_interactive": {
     "language": "fsharp"
    }
   },
   "outputs": [],
   "source": [
    "// Sort our dataframe using the Ints column\n",
    "df.OrderBy(\"Ints\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "dotnet_interactive": {
     "language": "fsharp"
    }
   },
   "outputs": [],
   "source": [
    "//Clone dataframe\n",
    "let newDf = df.Clone();\n",
    "\n",
    "//Add new column\n",
    "let newColumn = new StringDataFrameColumn(\"Month\", (df[\"DateTimes\"] :?> DateTimeDataFrameColumn |> Seq.map (fun x -> x.Value.ToString \"MMMM\")) |> Seq.toArray);\n",
    "newDf.Columns.Add(newColumn);"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "dotnet_interactive": {
     "language": "fsharp"
    }
   },
   "outputs": [],
   "source": [
    "// GroupBy\n",
    "let grouped = newDf.GroupBy(\"Month\")\n",
    "\n",
    "//Show grouped data\n",
    "let groupedDf = grouped.Head(10);\n",
    "groupedDf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "dotnet_interactive": {
     "language": "fsharp"
    },
    "polyglot_notebook": {
     "kernelName": "fsharp"
    }
   },
   "outputs": [],
   "source": [
    "let intGroupSum = grouped.Sum(\"Ints\");\n",
    "intGroupSum"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Charting columns from data frames"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "dotnet_interactive": {
     "language": "fsharp"
    }
   },
   "outputs": [],
   "source": [
    "open Plotly.NET\n",
    "open System.Linq"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "dotnet_interactive": {
     "language": "fsharp"
    }
   },
   "outputs": [],
   "source": [
    "display(Chart2D.Chart.Column<int, string, string, string, string>((intGroupSum[\"Ints\"] :?> Int32DataFrameColumn |> Seq.map (fun x -> if x = Nullable() then 0 else x.Value)), intGroupSum[\"Month\"] :?> StringDataFrameColumn ))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "dotnet_interactive": {
     "language": "fsharp"
    }
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".NET (C#)",
   "language": "C#",
   "name": ".net-csharp"
  },
  "language_info": {
   "name": "polyglot-notebook"
  },
  "polyglot_notebook": {
   "kernelInfo": {
    "defaultKernelName": "csharp",
    "items": [
     {
      "aliases": [],
      "name": ".NET"
     },
     {
      "aliases": [
       "C#",
       "c#"
      ],
      "languageName": "C#",
      "name": "csharp"
     },
     {
      "aliases": [
       "F#",
       "f#"
      ],
      "languageName": "F#",
      "name": "fsharp"
     },
     {
      "aliases": [],
      "languageName": "HTML",
      "name": "html"
     },
     {
      "aliases": [],
      "languageName": "KQL",
      "name": "kql"
     },
     {
      "aliases": [],
      "languageName": "Mermaid",
      "name": "mermaid"
     },
     {
      "aliases": [
       "powershell"
      ],
      "languageName": "PowerShell",
      "name": "pwsh"
     },
     {
      "aliases": [],
      "languageName": "SQL",
      "name": "sql"
     },
     {
      "aliases": [],
      "name": "value"
     },
     {
      "aliases": [
       "frontend"
      ],
      "name": "vscode"
     }
    ]
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
